clear
gen x = .
tempfile empty
save `empty', replace
set type double, permanently


// globals only relevant for this do file---Global switches to turn on/off sections of the do file
	global Data_GIS_section0 						"0" // clean massive distance data. looks dumb but i need to do this in my version of stata.
	global Data_GIS_section0_reshapelong 			"0"
	
	global Data_GIS_section0_5 						"1" // utility name masterlist for the shapefiles. correspondence with EIAID and OpenEI based data.
	
	global Data_GIS_section1 						"1" 
	global Data_GIS_section1_alt_stations 			"1" // alternative stations (i.e. EV stations) data
	// section2: creating dataset based on diff matching/mapping approach
	global Data_GIS_section2_spatial_RD 			"1"  // matching CBG pairs by distance across border.  Generates EVSales_Panel_Data_distance_matched_CBG.dta for spatial RD
	global Data_GIS_section2_pm 					"1" // Generates  psmatched_panel_EV_utility.dta for panel data regressions
	global Data_GIS_const_pm_25						"1"


if $Data_GIS_section0 == 1 {
	forvalues i = 1/12 {
		local counter = (`i'-1)* 2000
				import delimited "${Boundaries}/distance_bg_2013_sub`i'.csv", clear varnames(1)
		local endofloop = 2000
		if "`i'" == "12" local endofloop = 1212
		forvalues index = 1/`endofloop' {
			local index_adj = `counter' + `index'
			qui: ren x`index' x`index_adj'
		}
		di "renaming done"
		save "${Data_Clean}/distance_bg_`i'.dta", replace
	}
}


if $Data_GIS_section0_reshapelong == 1 {
	forvalues i = 1/12 {
		local counter = (`i'-1)* 2000
		local endofloop = 2000
		if "`i'" == "12" local endofloop = 1212
		use "${Data_Clean}/distance_bg_`i'.dta", clear
		local cutoff = 25*1609
		forvalues index = 1/`endofloop' {
			local index_adj = `counter' + `index'
			replace x`index_adj' = . if x`index_adj' > `cutoff'|  x`index_adj' == 0
			
		}
		gen colid = _n
		egen rowmax = rowmax(x*) 
		drop if rowmax == 0
		drop rowmax
		
		// now break the data into bits and reshape long
		local increment = 200
		local begin = 1 + `counter'
		local end = `increment'+ `counter'
		local segmentNum = 0
		local endofSegmentLoop = `counter' + `endofloop'
		while `end' <=  `endofSegmentLoop' {
			preserve 
				local segmentNum = `segmentNum' + 1
				di "keep colid x`begin'-x`end'"
				keep colid x`begin'-x`end'
				reshape long x, i(colid) j(colid_adj)
				drop if x == .
				tempfile reshape_section`segmentNum'
				save `reshape_section`segmentNum'', replace
				local begin = `begin' + `increment'
				local end = `end' + `increment'
				if("`i'" == "12") & (`begin' < `endofSegmentLoop') &  (`end' > `endofSegmentLoop') local end = `endofSegmentLoop'
			restore
		}
		use `reshape_section1', clear
		forvalues j = 2/`segmentNum' {
			append using `reshape_section`j''
		}
		save "${Data_Clean}/distance_bg_under25mi_`i'.dta", replace
	}
}


if $Data_GIS_section0_5 == 1 {
	use "${Data_Clean}/Electricity_rate_data_panel_collapsed", clear

	drop utility rate_name source mostrecent_label // variables dropped to make data light
	compress
	
	tempfile electricity_panel
	save `electricity_panel', replace
	
	// masterlist crosswalk of different utility names and the EIAID (From OpenEI data base)
	import excel "${ElectricityRates}/utility_names_eiaid_masterlist.xlsx", clear firstrow
	drop utility // utility name in the openEI data.
	tempfile utility_name_masterlist
	save `utility_name_masterlist', replace
	
	import excel cbg=A cbg_name=B mean_income=C population=E census_bg_area=G rowid=H utility_name=J using ///
		"${Boundaries}/overlap_boundaries_proper_new.xlsx", clear cellrange(A2)
	destring cbg, replace force
	duplicates tag cbg, gen(overlap)
	drop if overlap !=0
	drop overlap
	replace census_bg_area = census_bg_area/(1609^2)

	// create correspondence of various utility names and eiaids
	merge m:1 utility_name using  `utility_name_masterlist'
	tab _merge
	keep if _merge == 3
	drop _merge
	tempfile utility_bg_masterlist 
	save `utility_bg_masterlist', replace

	// Import Census Tracts to Zip Codes map and save tempfile
	import delimited "${Boundaries}/join_ZCTA_bg.csv", clear varnames(1)
	keep geoidx namex geoidy namey
	destring geoidy, force replace
	gen colid = _n
	ren geoidx cbg
	ren geoidy zipcode_primary
	ren namex cbg_name 
	ren namey zipcode_primary_name
	
	tempfile ZipMap
	save `ZipMap', replace
	
	// PGE service regions and zip codes
	import excel "${Boundaries}/RESZIPS.XLS", clear cellrange(A2:E1220) firstrow
	*tostring ZIPCODE, format(%12.0f) replace
	ren ZIPCODE zipcode_primary
	
	gsort zipcode_primary -Percentage
	by zipcode_primary: keep if _n == 1
	ren BaselineTerritory region
	keep zipcode_primary region
	isid zipcode_primary
	tempfile zip_region_PGE
	save `zip_region_PGE', replace
	
	// merge census-tract zip to zip region PGE
	use `ZipMap', clear
	merge m:1 zipcode_primary using `zip_region_PGE', keep(1 3) // include zips/ZCTA for other merges
	drop _merge
	isid cbg
	drop cbg_name zipcode_primary_name
	tempfile bg_PGETerritory
	save `bg_PGETerritory', replace
	
	// gas data
	use "${Restricted_Data}/OPIS/zip_month_avg_retail_price.dta", clear
	
	ren mailingzip zipcode_primary
	
	merge 1:1 zipcode_primary year month using "${Restricted_Data}/OPIS/GasPriceRadii.dta"
	
	tempfile GasData
	save `GasData', replace
	
	// CBG to climate zone crosswalk
	import delimited "${Boundaries}/CBG_climate_zone.csv", clear varnames(1)
	tempfile CBG_climate_zone
	save `CBG_climate_zone', replace
	
	// CBG level DMV data from 2013
	import delimited "${Restricted_Data}/DMV/CBG_dmv_2013.csv", clear varnames(1)
	drop year
	la var fuel_economy_mean "CBG 2013: mean fuel economy"
	la var standard_count "CBG 2013: standard vehicle count"
	la var luxury_count "CBG 2013: luxury vehicle count"
	la var total_car_count "CBG 2013: total car count"
	la var percent_luxury "CBG 2013: percent luxury"
	la var hybrid_count "CBG 2013: hybrid count"
	la var prius_count "CBG 2013: Prius count"
	tempfile CBG_dmv_2013
	save `CBG_dmv_2013', replace
	
	// MUD data at CBG level
	import delimited "${ACS}/CBG_MUDs.csv", clear varnames(1)
	ren total total_hh
	tempfile CBG_MUDs
	save `CBG_MUDs', replace
	
	
	// Median Income at CBG level 
	import delimited "${ACS}/med_income_bg_2013.csv", clear varnames(1)
	keep geoid 
	ren geoid cbg 
	gen colid = _n
	tempfile colid_to_cbg
	save `colid_to_cbg', replace
}


if $Data_GIS_section1 == 1 {
	
	use "${Restricted_Data}/Experian/ExperianMaster.dta", clear
	duplicates drop
	preserve
		ren PurchasePrice SellPrice
		drop if SellPrice == . | SellPrice == 0
		drop month year
		gen month = month(PurchaseDate)
		gen year = year(PurchaseDate)
		keep if OwnerState == "CA"
		gen SaleNumber = 1
		tab VehicleType
		collapse (sum) SaleNumber, by(year VehicleType)
		gen VehicleType_alt = 0 if VehicleType == "Conventional"
		replace VehicleType_alt = 1 if VehicleType == "BEV"
		replace VehicleType_alt = 2 if VehicleType == "PHEV"
		replace VehicleType_alt = 3 if VehicleType == "Hybrid"
		replace VehicleType_alt = 4 if VehicleType == "OTHER"
		
		drop VehicleType
		reshape wide SaleNumber, i(year) j(VehicleType_alt)
		la var SaleNumber1 "BEV"
		la var SaleNumber2 "PHEV"
		drop if year <=2010
		graph bar SaleNumber1 SaleNumber2, over(year) stack graphregion(color(white)) ///
			title("BEV/PHEV transactions in CA") legend(label(1 "BEV") label(2 "PHEV"))
		graph export "${ResultsOut}/Experian_NumSales.png",  as(png) replace

	restore
	/*
	** set date, cut out data before 2010
	local StartDate = date("January 1 2014", "MDY")
	local EndDate = date("December 31 2018", "MDY")
	keep if PurchaseDate >= `StartDate' & PurchaseDate <= `EndDate'
	*/
	replace NewUsedIndicator = "N" if New==1
	replace NewUsedIndicator = "U" if New==0
	ta VehicleType
	ren PurchasePrice SellPrice
	gen SaleNumber = 1
	drop if SellPrice == . | SellPrice == 0
	drop month year
	gen month = month(PurchaseDate)
	gen year = year(PurchaseDate)
	gen ym = ym(year, month)
	format ym %tm
	drop year month
	duplicates tag VIN PurchaseDate, gen(tag)
	bys VIN PurchaseDate: egen nvalsGroup = nvals(VehicleGroup)
	drop if tag > 0 & nvalsGroup > 1 & regexm(VehicleModel, "Prius") == 1 & VehicleGroup ==  "GROUP 2 - Non-ZEV"
	drop nvalsGroup tag
	duplicates tag VIN PurchaseDate, gen(tag)
	* tab tag // from herre not clear how to reduce redundancy
	replace ReportingPeriod = ReportingPeriod + "01"
	gen ReportingDate = date(ReportingPeriod, "YMD")
	gen DateLag = abs(PurchaseDate - ReportingDate)
	bys VIN PurchaseDate (DateLag): keep if _n == 1
	
	// clean Experian CBG crosswalk
	preserve
		use "${Restricted_Data}/Experian/experian_CBG_xwalk.dta", clear
		duplicates tag VIN PurchaseDate, gen(tag)
		bys VIN PurchaseDate (CBG_combined): keep if _n == _N
		tempfile CBG_xwalk_mod
		save `CBG_xwalk_mod', replace
	restore
	merge 1:1 VIN PurchaseDate using `CBG_xwalk_mod'
	tab VehicleType _merge if TitleState == "CA" 
	// replace CGB with CBG crosswalk data
	destring CBG_combined, replace
	
	replace OwnerCensusBlockGroup = CBG_combined if mi(OwnerCensusBlockGroup) == 1
	drop if mi(OwnerCensusBlockGroup) == 1
	drop if mi(VehicleType) == 1
	
	collapse (mean) SellPrice (sum) SaleNumber, by(OwnerCensusBlockGroup ym VehicleType)
	gen VehicleType_alt = 0 if VehicleType == "Conventional"
	replace VehicleType_alt = 1 if VehicleType == "BEV"
	replace VehicleType_alt = 2 if VehicleType == "PHEV"
	replace VehicleType_alt = 3 if VehicleType == "Hybrid"
	replace VehicleType_alt = 4 if VehicleType == "OTHER"
	drop VehicleType
	tab VehicleType_alt
	
	// create obs for missing years and months
	reshape wide SellPrice SaleNumber, i(OwnerCensusBlockGroup VehicleType_alt) j(ym)
	reshape long SellPrice SaleNumber, i(OwnerCensusBlockGroup VehicleType_alt) j(ym)
	replace SellPrice = . if SellPrice == 0
	replace SaleNumber = 0 if SaleNumber ==.
	
	reshape wide SellPrice SaleNumber, i(OwnerCensusBlockGroup ym) j(VehicleType_alt)
	forvalues i = 0/4 {
		replace SellPrice`i' = . if SellPrice`i' == 0
		replace SaleNumber`i' = 0 if SaleNumber`i' ==.
	}
	ren OwnerCensusBlockGroup cbg
	
	gen year = yofd(dofm(ym))
	gen month = month(dofm(ym))
	drop ym
	tab month year
	la var SellPrice0 "Average sale price: Conventional"
	la var SellPrice1 "Average sale price: BEV"
	la var SellPrice2 "Average sale price: PHEV"
	la var SellPrice3 "Average sale price: Hybrid"
	la var SellPrice4 "Average sale price: Other"
	la var SaleNumber0 "Number sold: Conventional"
	la var SaleNumber1  "Number sold: BEV"
	la var SaleNumber2  "Number sold: PHEV"
	la var SaleNumber3  "Number sold: Hybrid"
	la var SaleNumber4  "Number sold: other"
	egen Sale = rowtotal(SaleNumber?)
	la var Sale  "Number sold: all types"
	sort cbg year month
	order cbg year month
	drop if cbg == 0
	keep cbg year month SellPrice1 SellPrice2 SaleNumber1 SaleNumber2
	save "${Data_Clean}/Experian_at_bg_level.dta", replace
	preserve
		collapse Sell* Sale*, by(month year)
		gen ym = ym(year, month)
		format ym %tm
		tsset ym
		tsline Sale*
	restore


}
if $Data_GIS_section1_alt_stations == 1 {
	import excel "${Boundaries}/join_cbg_to_alt_stations.xlsx", clear firstrow
	keep if State == "CA"
	keep if ïFuelTypeCode == "ELEC"
	keep ID OpenDate EVConnectorTypes GEOID
	destring GEOID, replace force
	ren GEOID cbg
	gen opendate = date(OpenDate, "YMD")
	format opendate %td
	drop OpenDate
	gen year = year(opendate)
	gen month = month(opendate)
	gen ymopen = ym(year, month)
	format ymopen %tm // jan 2014 is 648, dec 2017 is 695.
	forvalues t = 648/695 {
		bys cbg: egen station_count`t'_temp = count(cbg) if ymopen <= `t' & !mi(ymopen)
		bys cbg: egen station_count`t' = max(station_count`t'_temp)
		replace station_count`t' = 0 if mi(station_count`t') == 1
		drop station_count`t'_temp
	}
	bys cbg: egen total_station_temp = count(cbg)
	bys cbg: egen total_station = max(total_station_temp)
	drop total_station_temp
	keep cbg station_count* total_station
	drop if mi(cbg)
	duplicates drop
	reshape long station_count, i(cbg total_station) j(ym)
	format ym %tm
	gen date = dofm(ym)
	format date %td
	gen year = year(date)
	gen month = month(date)
	drop ym date
	order cbg total_station year month
	save "${Data_Clean}/alt_stations_panel.dta", replace
	
	keep cbg total_station
	duplicates drop 
	isid cbg
	save "${Data_Clean}/alt_stations_total_xsection.dta", replace
}
if $Data_GIS_section2_spatial_RD == 1 {
	// rowid is utility id based on utility shapefile orders, _adj for matched adjacent utility
	// colid (as in column id) is the cbg id, also taken from the cbg shapefiles. same deal for _adj.
	
	use "${Boundaries}/cbg_boundary_matched.dta", clear
	ren id groupid // groupid --- utility pair id
	ren ref rowid
	ren adj rowid_adj
	ren adj_cbg cbg_adj
	ren ref_cbg cbg
	ren distance_to_adj distance_util_adj
	ren distance_to_ref distance_util
	ren distance distance_btw_cbg
	foreach var in cbg cbg_adj rowid rowid_adj groupid {
		destring `var', replace
	} 
	// some obs are missing matches, so drop them
	drop if mi(cbg) == 1| mi(cbg_adj) == 1
	
	// now merge other cbg-level variables
	preserve
		use `utility_bg_masterlist', clear
		gen pop_density = population/census_bg_area
		la var pop_density "pop density (ppl/sq mile)"
		la var cbg "census block group (ACS)"
		order cbg 
		tempfile collevel
		save `collevel', replace
		foreach var of varlist cbg-pop_density {
			ren `var' `var'_adj
		}
		tempfile collevel_adj
		save `collevel_adj', replace
	restore

	merge m:1 cbg using `collevel', nogen keep(1 3)
	merge m:1 cbg_adj using `collevel_adj', nogen keep(1 3)
	

	summ groupid
	levelsof groupid, local(groupid)
	foreach i of local groupid {
		preserve
			keep if groupid == `i'
			foreach varbit in rowid distance_util cbg ///
				mean_income population census_bg_area pop_density ///
				utility_name eiaid {
				ren `varbit'_adj `varbit'1
				ren `varbit' `varbit'0
			}
			gen group_pair_id = _n
			reshape long rowid distance_util cbg ///
				mean_income population census_bg_area pop_density ///
				utility_name eiaid, i(distance_btw_cbg groupid group_pair_id) j(adjacent)
			gen years = 4
			gen months = 12
			expand years
			gen year = 2013
			bys groupid group_pair_id cbg: gen n = _n
			replace year = year + n
			
			expand months
			bys groupid group_pair_id cbg year: gen month = _n 
			assert month <=12
			drop years months
			isid groupid group_pair_id cbg year month 
			
			merge m:1 cbg year month using "${Data_Clean}/Experian_at_bg_level.dta", keep(1 3)
			
			
			merge m:1 eiaid year month using `electricity_panel', keep(1 3) nogen
		
			
			merge m:1 cbg using `bg_PGETerritory', keep(1 3) nogen
	
			replace region = "" if eiaid != 14328
			
			foreach var of varlist Tier?UsageAmt MaxTierUsageAmt pastlastupdate	{
				foreach letter in P R S T V X {
					replace `var' = `var'_`letter' if region == "`letter'" & eiaid == 14328 
				}
			}	
			drop AvgRateTier?_? Tier?UsageAmt_? MaxTierUsageAmt_? HighestTierAvgRate_? pastlastupdate_?
			la var region "utility region (PGE)"
			

			compress
			drop n _merge
			save "${Data_Clean}/matched_data_group`i'.dta", replace
		restore
	}
	use `empty', clear
	foreach j of local groupid {
		append using "${Data_Clean}/matched_data_group`j'.dta"
		erase "${Data_Clean}/matched_data_group`j'.dta"
	}
	// merge climate zones data, and use it to identify max usage for SDGE and SCE
	merge m:1 cbg using `CBG_climate_zone', keep(1 3) nogen
	forvalues i = 0/3 {
		replace Tier`i'UsageAmt =  Tier`i'UsageAmt_Coastal if utility == "San Diego Gas & Electric" & (climate_zone == "6" | climate_zone == "8")
		replace Tier`i'UsageAmt =  Tier`i'UsageAmt_Inland if utility == "San Diego Gas & Electric" & climate_zone == "10"
		replace Tier`i'UsageAmt =  Tier`i'UsageAmt_Mountain if utility == "San Diego Gas & Electric" & climate_zone == "14"
		replace Tier`i'UsageAmt =  Tier`i'UsageAmt_Desert if utility == "San Diego Gas & Electric" & climate_zone == "15"
	}
	drop Tier?UsageAmt_Coastal Tier?UsageAmt_Inland Tier?UsageAmt_Mountain Tier?UsageAmt_Desert
	
	// now merge gas price data and MUDs
	merge m:1 zipcode_primary year month using `GasData', keep(1 3) nogen
	merge m:1 cbg using `CBG_dmv_2013', keep(1 3) nogen
	merge m:1 cbg using `CBG_MUDs', keep(1 3) nogen
	drop x 
	la var groupid "Adjacent Utility pair ID"
	la var group_pair_id "Within utility-pair, CBG-pair ID"
	la var cbg "Census Block Group ID"
	la var adjacent "1 if observation is in adjacent utility"
	la var distance_btw_cbg "distance between matched CBGs"
	la var distance_util "distance to the utility border with the adjacent, paired utility"
	la var rowid "utility ID (row order) in the utility shapefile"
	la var mean_income "mean household income"
	la var population "population"
	la var census_bg_area "CBG area (sq. mile)"
	la var pop_density "population/sq. mile"
	la var eiaid "EIA's utility number"
	la var year "Year"
	la var month "Month"
	la var ref_count "Number of CBGs from the reference utility"
	la var adj_count "Number of CBGs from the adjacent utility for a given ref utility"
	la var HighestTierAvgRate "Weekday avearge top tier rate"
	la var zipcode_primary "Zipcode with which CBG has greatest overlap in area"
	forvalues i = 0/4 {
		la var Tier`i'UsageAmt "KWh/month used to get to tier `i'"
	}
	la var MaxTierUsageAmt "KWh/month used to get to the top tier"

	la var manually_collected "Utility rates manually collected"
	order groupid group_pair_id adjacent cbg year month distance_btw_cbg distance_util 
	save "${Data_Clean}/EVSales_Panel_Data_distance_matched_CBG.dta", replace
}


if $Data_GIS_section2_pm == 1 {
	use `utility_bg_masterlist', clear
	
	gen pop_density = population/census_bg_area
	la var pop_density "pop density (ppl/sq mile)"
	la var cbg "census block group (ACS)"
	
	merge 1:1 cbg using "${Data_Clean}/alt_stations_total_xsection.dta", keep(1 3) gen(_merge_stations)
	drop _merge* 
	replace total_station = 0 if mi(total_station) == 1 
	
	merge m:1 cbg using `colid_to_cbg', keep(1 3) nogen
	
	tempfile masterlist_augmented
	save `masterlist_augmented', replace
	levelsof rowid, local(rowid_local)
	
	local reshapevars "cbg colid rowid eiaid bay_area_muni greater_la_muni central_valley_muni utility_name cbg_name mean_income population pop_density total_station"
	preserve
		foreach var in `reshapevars' {
			ren `var' `var'1
		}
		tempfile masterlist_augmented_adj
		save `masterlist_augmented_adj', replace
	restore
	
	foreach utility_exclude in none PGE SCE LADWP {
		local PGE_eiaid = 14328
		local SCE_eiaid = 17609
		local LADWP_eiaid = 11208
		foreach nn in 1 /*3 5 */ { // number of nearest matches
			preserve
				// nearest neighbor matching, based on 
				forvalues k = 1/`nn' {
					foreach var in `reshapevars' {
						gen `var'`k' = .
					}
					tostring utility_name`k', replace
					tostring cbg_name`k' , replace
				}
				keep if !mi(mean_income) & !mi(eiaid)
				
				
				sort eiaid cbg
				// drop certain utilities.
				if "`utility_exclude'" != "none" {
					drop if eiaid == ``utility_exclude'_eiaid'
				}
				
				levelsof rowid, local(rowid_local_subset)
				foreach i of local rowid_local_subset {
					gen rowid_`i' = rowid == `i' if !mi(rowid) 
					psmatch2 rowid_`i' mean_income pop_density total_station, common neighbor(`nn')
					
					sort _id
					forvalues k = 1/`nn' {
						foreach var in `reshapevars' {
							replace `var'`k' = `var'[_n`k'] if rowid_`i' == 1
						}
					}
					if `nn' == 1 & "`utility_exclude'" == "none" {
						tempfile pscore`i'
						save `pscore`i'', replace
					}
					drop _pscore-_pdif 
					drop rowid_`i'
				}
				drop census_bg_area
				egen nonmiss = rownonmiss(cbg?)
				drop if nonmiss == 0
				drop nonmiss
				forvalues k = 1/`nn' {
					la var colid`i' "cbg id based on the row number in shapefile, match `k' out of `nn'" 
					la var rowid`i' "utility id for matched CBG based on the row number in shapefile, match `k' out of `nn'" 
					la var cbg`k' "Matched CBG, match `k' out of `nn'"
					la var cbg_name`k' "Matched CBG's name, match `k' out of `nn'"
					la var population`k'  "Matched CBG's population, match `k' out of `nn'"
					la var eiaid`k' "EIAID of matched CBG, match `k' out of `nn'"
					la var bay_area_muni`k' "CBG is a Bay Area muni, match `k' out of `nn'"
					la var greater_la_muni`k' "CBG is a greater LA muni, match `k' out of `nn'"
					la var central_valley_muni`k' "CBG is a Central Valley muni, match `k' out of `nn'"
					la var mean_income`k' "mean income of matched CBG, match `k' out of `nn'"
					la var pop_density`k' "pop density of matched CBG, match `k' out of `nn'"
					la var utility_name`k' "Utility name of matched CBG, match `k' out of `nn'"
					la var total_station`k' "Num charging stations of matched CBG, match `k' out of `nn'"
				}
				isid cbg
				if "`utility_exclude'" == "none" {
					save "${Data_Clean}/propensity_matched_CBG_list_nn`nn'.dta", replace
				}
				else {
					save "${Data_Clean}/propensity_matched_CBG_list_nn`nn'_excl_`utility_exclude'.dta", replace
				}
				
			restore
		} // end of nearest neighbor match number loop
	} // end loops on excluding utilities
	
	
	// add distance measrue to the ps matched data, and merge utility and EV data.
	
		use "${Data_Clean}/propensity_matched_CBG_list_nn1.dta", clear


		order colid colid1
		sort colid1 colid
		keep colid1 colid
		forvalues i = 1/12 {
			preserve
			local begin = (`i'-1)* 2000 + 1
			local end = (`i')* 2000 
			if "`i'" == "12" local end = 23212
			di "`begin' to `end'"
			keep if colid1 >= `begin' & colid1 <= `end'
			tempfile psmatch_pairs`i'
			save `psmatch_pairs`i'', replace
		
			use "${Data_Clean}/distance_bg_`i'.dta", clear
			gen colid = _n
			merge 1:1 colid using `psmatch_pairs`i'', keep(3) nogen
			
			gen distance_psmatch = .
			gen k = _n
			summ k
			local max_k = `r(max)'
			forvalues k = 1/ `max_k' {
				local j = colid1[`k']
				di `j'
				replace distance_psmatch = x`j' if k == `k'
			}
			keep colid colid1 distance_psmatch
			
			di "file `i' done"
			tempfile distance_psmatch_`i'
			save `distance_psmatch_`i'', replace
			restore
		}
		use `distance_psmatch_1', clear
		forvalues i = 2/12 {
			append using `distance_psmatch_`i''
		}
		la var distance_psmatch "Distance between CBGs, ps matched sample"
		tempfile psmatch_distance_final
		save `psmatch_distance_final', replace
		
		// now merge back in the distance variable.
		use "${Data_Clean}/propensity_matched_CBG_list_nn1.dta", clear


		merge 1:1 colid colid1 using `psmatch_distance_final', keep(3) nogen
		foreach var in `reshapevars' {
			ren `var' `var'0
		}
		
		gen psmatch_pair = _n
		reshape long `reshapevars', i(psmatch_pair distance_psmatch) j(match)
		isid psmatch_pair cbg
	
		gen years = 4
		gen months = 12
		expand years
		gen year = 2013
		
		bys psmatch_pair cbg: gen n = _n
		replace year = year + n
		expand months
		bys psmatch_pair cbg year: gen month = _n 
		assert month <=12
		drop years months n
		isid psmatch_pair cbg year month 
		merge m:1 cbg year month using "${Data_Clean}/Experian_at_bg_level.dta", keep(1 3)
		merge m:1 cbg year month using "${Data_Clean}/alt_stations_panel.dta", keep(1 3) gen(_merge_station) 
		merge m:1 eiaid year month using `electricity_panel', keep(1 3) nogen
	
		merge m:1 cbg using `bg_PGETerritory', keep(1 3) nogen
		
		// region code only applies for now to PGE
		replace region = "" if eiaid != 14328
		foreach var of varlist Tier?UsageAmt MaxTierUsageAmt pastlastupdate	{
			foreach letter in P R S T V X {
				replace `var' = `var'_`letter' if region == "`letter'" & eiaid == 14328 
			}
		}	
		drop AvgRateTier?_? Tier?UsageAmt_? MaxTierUsageAmt_? HighestTierAvgRate_? pastlastupdate_?
		la var region "utility region (PGE)"
		
		// now merge gas price data etc
		merge m:1 cbg using `CBG_climate_zone', keep(1 3) nogen
		forvalues i = 0/3 {
			replace Tier`i'UsageAmt =  Tier`i'UsageAmt_Coastal if utility == "San Diego Gas & Electric" & (climate_zone == "6" | climate_zone == "8")
			replace Tier`i'UsageAmt =  Tier`i'UsageAmt_Inland if utility == "San Diego Gas & Electric" & climate_zone == "10"
			replace Tier`i'UsageAmt =  Tier`i'UsageAmt_Mountain if utility == "San Diego Gas & Electric" & climate_zone == "14"
			replace Tier`i'UsageAmt =  Tier`i'UsageAmt_Desert if utility == "San Diego Gas & Electric" & climate_zone == "15"
		}
		drop Tier?UsageAmt_Coastal Tier?UsageAmt_Inland Tier?UsageAmt_Mountain Tier?UsageAmt_Desert
				
		merge m:1 zipcode_primary year month using `GasData', keep(1 3) nogen
		merge m:1 cbg using `CBG_dmv_2013', keep(1 3) nogen
		merge m:1 cbg using `CBG_MUDs', keep(1 3) nogen
		compress		
		
		drop station_count
		drop _merge* fixedchargefirstmeter
		
		la var psmatch_pair "ID for a propensity score matched pair"
		la var match "1 if the CBG is the PS match of a given psmatch_pair, 0 if reference"
		la var cbg "Census Block Group ID"
		la var rowid "utility ID (row order) in the utility shapefile"
		la var colid "CBG ID (column order) in the CBG shapefile"
		la var mean_income "mean household income"
		la var population "population"
		la var pop_density "population/sq. mile"
		la var eiaid "EIA's utility number"
		la var year "Year"
		la var month "Month"
		la var HighestTierAvgRate "Weekday avearge top tier rate"
		la var zipcode_primary "Zipcode with which CBG has greatest overlap in area"
		forvalues i = 0/4 {
			la var Tier`i'UsageAmt "KWh/month used to get to tier `i'"
		}
		la var MaxTierUsageAmt "KWh/month used to get to the top tier"
		la var bay_area_muni "1 if Bay Area muni"
		la var greater_la_muni "1 if Greater LA muni (exc. LADWP)"
		la var central_valley_muni "1 if Central Valley muni"
		la var utility_name "Utility name"
		la var cbg_name "CBG name"
		la var total_station "Number of EV station (cross section)"
		la var manually_collected "Utility rates manually collected"		
		save "${Data_Clean}/psmatched_panel_EV_utility.dta", replace
}	
	
	
	